<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2016-2017 Cask Data, Inc." name="copyright" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>Installation for MapR</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-section-numbering.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="Installation" href="index.html" />
    <link rel="next" title="Installation on Microsoft Azure HDInsight" href="azure-hdinsight.html" />
    <link rel="prev" title="Installation using Apache Ambari" href="ambari.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: admin-manual -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a>
      </li>
      <li class="toctree-l1"><b><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cdap-components.html"> CDAP Components</a></li>
<li class="toctree-l1"><a class="reference internal" href="../deployment-architectures.html"> Deployment Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../hadoop-compatibility.html"> Hadoop Compatibility</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cdap-hadoop-compatibility.html"> CDAP and Hadoop Compatibility</a></li>
<li class="toctree-l1"><a class="reference internal" href="../system-requirements.html"> System Requirements</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> Installation</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="cloudera.html">Cloudera Manager</a></li>
<li class="toctree-l2"><a class="reference internal" href="emr.html">Amazon EMR</a></li>
<li class="toctree-l2"><a class="reference internal" href="ambari.html">Apache Ambari</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">MapR</a></li>
<li class="toctree-l2"><a class="reference internal" href="azure-hdinsight.html">Microsoft Azure HDInsight</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html">Packages</a></li>
<li class="toctree-l2"><a class="reference internal" href="replication.html">Replication</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../incompatibilities.html"> Incompatibilities</a></li>
<li class="toctree-l1"><a class="reference internal" href="../upgrading/index.html"> Upgrading</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/cloudera.html">Cloudera Manager</a></li>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/ambari.html">Apache Ambari</a></li>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/mapr.html">MapR</a></li>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/packages.html">Packages</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../security/index.html"> Security</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../security/perimeter-security.html">Perimeter Security</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/authorization.html">Authorization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/impersonation.html">Impersonation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/system-services.html">Enabling SSL for System Services</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/secure-storage.html">Secure Storage</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../operations/index.html"> Operations</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../operations/logging.html"> Logging and Monitoring</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/metrics.html"> Metrics</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/operations-dashboard.html"> Dashboard and Reports</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/preferences.html"> Preferences and Runtime Arguments</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/scaling-instances.html"> Scaling Instances</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/resource-guarantees.html"> Resource Guarantees in YARN</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/tx-maintenance.html"> Transaction Service Maintenance</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/cdap-ui.html"> CDAP UI</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../appendices/index.html"> Appendices</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../appendices/cdap-site.html"> Appendix: cdap-site.xml</a></li>
<li class="toctree-l2"><a class="reference internal" href="../appendices/cdap-security.html"> Appendix: cdap-security.xml</a></li>
<li class="toctree-l2"><a class="reference internal" href="../appendices/minimal-cdap-site.html"> Appendix: Minimal cdap-site.xml</a></li>
<li class="toctree-l2"><a class="reference internal" href="../appendices/hbase-ddl-executor.html"> Appendix: HBaseDDLExecutor</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="installation-for-mapr">
<span id="admin-installation-mapr"></span><h1>Installation for MapR<a class="headerlink" href="#installation-for-mapr" title="Permalink to this headline">🔗</a></h1>
<div class="admin-manual-step-images container">
<a class="reference internal" href="#preparing-the-cluster"><img alt="step-1" class="align-top" src="../_images/step-1.png" style="width: 166.0px; height: 90.0px;" /></a><a class="reference internal" href="#downloading-and-distributing-packages"><img alt="step-2" class="align-top" src="../_images/step-2.png" style="width: 230.0px; height: 90.0px;" /></a><a class="reference internal" href="#installing-cdap-services"><img alt="step-3" class="align-top" src="../_images/step-3.png" style="width: 187.0px; height: 90.0px;" /></a><a class="reference internal" href="#starting-cdap-services"><img alt="step-4" class="align-top" src="../_images/step-4.png" style="width: 187.0px; height: 90.0px;" /></a><a class="reference internal" href="#verification"><img alt="step-5" class="align-top" src="../_images/step-5.png" style="width: 165.0px; height: 90.0px;" /></a></div>
<div class="section" id="preparing-the-cluster">
<h2>Preparing the Cluster<a class="headerlink" href="#preparing-the-cluster" title="Permalink to this headline">🔗</a></h2>
<p>Please review the <a class="reference internal" href="../system-requirements.html#admin-manual-software-requirements"><span class="std std-ref">Software Prerequisites</span></a>, as a
configured Hadoop, HBase, and Hive (plus an optional Spark client) <a class="reference external" href="https://www.mapr.com">MapR Converged Data
Platform</a> cluster needs to be available for the node(s) where CDAP
will run.</p>
<p>If you are installing CDAP with the intention of using <em>replication,</em> see these
instructions on <a class="reference internal" href="replication.html#installation-replication"><span class="std std-ref">CDAP Replication</span></a> <em>before</em> installing or starting CDAP.</p>
<p>If colocating CDAP on cluster hosts with actual services, such as the <em>MapR CLDB</em>, <em>YARN
ResourceManager</em>, or <em>HBase Master</em>, then the client configurations will already be in place.</p>
<ul class="simple">
<li>To configure a MapR client, see the MapR documentation on <a class="reference external" href="http://doc.mapr.com/display/MapR/Setting+Up+the+Client">Setting Up the Client</a>.</li>
<li>To configure a MapR HBase client, see the MapR documentation on <a class="reference external" href="http://doc.mapr.com/display/MapR/Installing+HBase#InstallingHBase-HBaseonaClientInstallingHBaseonaClient">Installing HBase on a Client</a>.</li>
<li>To configure a MapR Hive client, see the MapR documentation on <a class="reference external" href="http://doc.mapr.com/display/MapR/Installing+Hive">Installing Hive</a>.</li>
</ul>
<p>A typical client node should have the <code class="docutils literal notranslate"><span class="pre">mapr-client</span></code>, <code class="docutils literal notranslate"><span class="pre">mapr-hbase</span></code>, and <code class="docutils literal notranslate"><span class="pre">mapr-hive</span></code>
packages installed, and can be configured using the MapR <a class="reference external" href="http://doc.mapr.com/display/MapR/configure.sh">configure.sh</a> utility.</p>
<div class="section" id="hadoop-configuration">
<h3>Hadoop Configuration<a class="headerlink" href="#hadoop-configuration" title="Permalink to this headline">🔗</a></h3>
<ol class="arabic">
<li><p class="first">ZooKeeper’s <code class="docutils literal notranslate"><span class="pre">maxClientCnxns</span></code> must be raised from its default.  We suggest setting it to zero
(0: unlimited connections). As each YARN container launched by CDAP makes a connection to ZooKeeper,
the number of connections required is a function of usage.</p>
</li>
<li><p class="first">Ensure that YARN has sufficient memory capacity by lowering the default minimum container
size (controlled by the property <code class="docutils literal notranslate"><span class="pre">yarn.scheduler.minimum-allocation-mb</span></code>). Lack of
YARN memory capacity is the leading cause of apparent failures that we see reported.
We recommend starting with these settings:</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.delete.debug-delay-sec</span></code>: 43200 <em>(see note below)</em></li>
<li><code class="docutils literal notranslate"><span class="pre">yarn.scheduler.minimum-allocation-mb</span></code>: 512 mb</li>
</ul>
<p>The value we recommend for <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.delete.debug-delay-sec</span></code> (<code class="docutils literal notranslate"><span class="pre">43200</span></code> or 12
hours) is what we use internally at Cask for testing as that provides adequate time to
capture the logs of any failures. However, you should use an appropriate non-zero value
specific to your environment. A large value can be expensive from a storage perspective.</p>
<p>Please ensure your <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.resource.cpu-vcores</span></code> and
<code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.resource.memory-mb</span></code> settings are set sufficiently to run CDAP,
as described in the <a class="reference internal" href="../system-requirements.html#admin-manual-memory-core-requirements"><span class="std std-ref">CDAP Memory and Core Requirements</span></a>.</p>
</li>
</ol>
</div>
<div class="section" id="create-the-cdap-user">
<h3>Create the “cdap” User<a class="headerlink" href="#create-the-cdap-user" title="Permalink to this headline">🔗</a></h3>
<p>To prepare your cluster for CDAP, manually create a <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user on all nodes of the
cluster. As <em>“…MapR uses each node’s native operating system configuration to
authenticate users and groups for access to the cluster…[MapR documentation]”,</em> make
sure that the UID and GID for the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user is the same on each node of the cluster:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> id cdap
<span class="go">uid=503(cdap) gid=504(cdap) groups=504(cdap)</span>
</pre></div>
</div>
<p><em>Note:</em> The values returned by <code class="docutils literal notranslate"><span class="pre">id</span> <span class="pre">cdap</span></code> may differ from these shown depending on your system.</p>
<p>When installing CDAP on an edge node, the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> system user is only created locally. As
Hadoop resolves users at the NameNode, the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user must also be added there, or name
resolution for the user will fail.</p>
<p>See the MapR documentation (<a class="reference external" href="http://maprdocs.mapr.com/home/AdvancedInstallation/PreparingEachNode-connectivity.html">Common Users</a>)
for more information.</p>
</div>
<div class="section" id="hdfs-permissions">
<span id="mapr-hdfs-permissions"></span><h3>HDFS Permissions<a class="headerlink" href="#hdfs-permissions" title="Permalink to this headline">🔗</a></h3>
<p>Ensure YARN is configured properly to run MapReduce programs.  Often, this includes
ensuring that the HDFS <code class="docutils literal notranslate"><span class="pre">/user/yarn</span></code> and <code class="docutils literal notranslate"><span class="pre">/user/cdap</span></code> directories exist with proper
permissions:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> su mapr
<span class="gp">$</span> hadoop fs -mkdir -p /user/yarn &amp;&amp; hadoop fs -chown yarn:yarn /user/yarn
<span class="gp">$</span> hadoop fs -mkdir -p /user/cdap &amp;&amp; hadoop fs -chown cdap:cdap /user/cdap
</pre>
</div>
<p>Ensure the Hive DB directory is configured properly to allow CDAP to create Hive tables.
The Hive DB directory (default <code class="docutils literal notranslate"><span class="pre">/user/hive/</span></code>) by default is only accessible to the <code class="docutils literal notranslate"><span class="pre">mapr</span></code> user.
Change the permissions on this directory:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> su mapr
<span class="gp">$</span> hadoop fs -chmod <span class="m">1777</span> /user/hive/
</pre></div>
</div>
<p>When changing HDFS permissions on MapR, always use the <code class="docutils literal notranslate"><span class="pre">mapr</span></code> user as shown above.</p>
</div>
<div class="section" id="install-cdap-compatible-spark">
<h3>Install CDAP-compatible Spark<a class="headerlink" href="#install-cdap-compatible-spark" title="Permalink to this headline">🔗</a></h3>
<p>If MapR is installed using MapR’s installer, the default Spark is version 2.0.1. This
version is not currently supported by CDAP. We <a class="reference internal" href="../hadoop-compatibility.html#admin-manual-hadoop-compatibility-matrix-optional"><span class="std std-ref">currently support</span></a> the latest 1.x version of Spark. In
order to use Spark, it needs to be manually installed through packages, as described in
the <a class="reference external" href="http://maprdocs.mapr.com/home/AdvancedInstallation/InstallSparkonYARN.html">MapR documentation</a>.</p>
</div>
</div>
<div class="section" id="downloading-and-distributing-packages">
<h2>Downloading and Distributing Packages<a class="headerlink" href="#downloading-and-distributing-packages" title="Permalink to this headline">🔗</a></h2>
<table border="1" class="docutils" id="mapr-compatibility-matrix">
<colgroup>
<col width="34%" />
<col width="66%" />
</colgroup>
<thead valign="bottom">
<tr class="row-odd"><th class="head" colspan="2">Supported MapR Distributions for Apache Hadoop</th>
</tr>
<tr class="row-even"><th class="head">CDAP Series</th>
<th class="head">MapR Distributions</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-odd"><td>CDAP 4.1.x</td>
<td>MapR 4.1 through MapR 5.2</td>
</tr>
<tr class="row-even"><td>CDAP 4.0.x</td>
<td>MapR 4.1 through MapR 5.2</td>
</tr>
<tr class="row-odd"><td>CDAP 3.6.x</td>
<td>MapR 4.1 through MapR 5.2</td>
</tr>
<tr class="row-even"><td>CDAP 3.5.x</td>
<td>MapR 4.1 through MapR 5.2</td>
</tr>
<tr class="row-odd"><td>CDAP 3.4.x</td>
<td>MapR 4.1 through MapR 5.1</td>
</tr>
<tr class="row-even"><td>CDAP 3.3.x</td>
<td>MapR 4.1 through MapR 5.1</td>
</tr>
<tr class="row-odd"><td>CDAP 3.2.x</td>
<td>MapR 4.1, MapR 5.0</td>
</tr>
<tr class="row-even"><td>CDAP 3.1.x</td>
<td>MapR 4.1</td>
</tr>
</tbody>
</table>
<div class="section" id="preparing-package-managers">
<span id="mapr-compatibility-matrix-end"></span><h3>Preparing Package Managers<a class="headerlink" href="#preparing-package-managers" title="Permalink to this headline">🔗</a></h3>
<p>CDAP components are available as either Yum <code class="docutils literal notranslate"><span class="pre">.rpm</span></code> or APT <code class="docutils literal notranslate"><span class="pre">.deb</span></code> packages. There is
one package for each CDAP component, and each component may have multiple services.
Additionally, there is a base CDAP package with three utility packages (for HBase
compatibility) installed which creates the base configuration and the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user.</p>
<p>We provide packages for <em>Ubuntu 12.04+</em> and <em>CentOS 6.0+</em>. While these are the only
packages we currently provide, they contain no distribution version-specific code, and the
same packages will work on equivalent OSes.</p>
<p>Available packaging types:</p>
<ul class="simple">
<li>RPM: Yum repo</li>
<li>Debian: APT repo</li>
<li>Tar: For specialized installations only</li>
</ul>
<p><strong>Note:</strong> If you are using <a class="reference external" href="https://www.getchef.com">Chef</a> to install CDAP, an
<a class="reference external" href="https://supermarket.getchef.com/cookbooks/cdap">official cookbook is available</a>.</p>
<div class="section" id="on-rpm-using-yum">
<span id="mapr-install-rpm-using-yum"></span><h4>On RPM using Yum<a class="headerlink" href="#on-rpm-using-yum" title="Permalink to this headline">🔗</a></h4>
<p>Download the Cask Yum repo definition file:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> sudo curl -o /etc/yum.repos.d/cask.repo http://repository.cask.co/centos/6/x86_64/cdap/6.1/cask.repo
</pre>
</div>
<p>This will create the file <code class="docutils literal notranslate"><span class="pre">/etc/yum.repos.d/cask.repo</span></code> with:</p>
<pre class="literal-block">
[cask]
name=Cask Packages
baseurl=https://repository.cask.co/centos/6/x86_64/cdap/6.1
enabled=1
gpgcheck=1
</pre>
<p>Add the Cask Public GPG Key to your repository:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> sudo rpm --import http://repository.cask.co/centos/6/x86_64/cdap/6.1/pubkey.gpg
</pre>
</div>
<p>Update your Yum cache:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo yum makecache
</pre></div>
</div>
</div>
<div class="section" id="on-debian-using-apt">
<h4>On Debian using APT<a class="headerlink" href="#on-debian-using-apt" title="Permalink to this headline">🔗</a></h4>
<p>Download the Cask APT repo definition file:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> sudo curl -o /etc/apt/sources.list.d/cask.list http://repository.cask.co/ubuntu/precise/amd64/cdap/6.1/cask.list
</pre>
</div>
<p>This will create the file <code class="docutils literal notranslate"><span class="pre">/etc/apt/sources.list.d/cask.list</span></code> with:</p>
<pre class="literal-block">
deb [ arch=amd64 ] http://repository.cask.co/ubuntu/precise/amd64/cdap/6.1 precise cdap
</pre>
<p>Add the Cask Public GPG Key to your repository:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> curl -s http://repository.cask.co/ubuntu/precise/amd64/cdap/6.1/pubkey.gpg | sudo apt-key add -
</pre>
</div>
<p>Update your APT-cache:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo apt-get update
</pre></div>
</div>
</div>
<div class="section" id="using-tar">
<span id="mapr-install-using-tar"></span><h4>Using Tar<a class="headerlink" href="#using-tar" title="Permalink to this headline">🔗</a></h4>
<p>Download the appropriate CDAP tar file, and then unpack it to an appropriate directory (indicated by <code class="docutils literal notranslate"><span class="pre">$dir</span></code>):</p>

<script type="text/javascript">

  $(function tabbedparsedliteral1() {
    var tabs = ['rhel', 'ubuntu'];
    var mapping = {'rhel': 'rhel', 'ubuntu': 'ubuntu'};
    var tabSetID = 'rhel-ubuntu';
    for (var i = 0; i < tabs.length; i++) {
      var tab = tabs[i];
      $("#tabbedparsedliteral1 .example-tab-" + tab).click(changeExampleTab(tab, mapping, "tabbedparsedliteral1", tabSetID));
    }
  });

</script>
<div id="tabbedparsedliteral1" class="tabbed-parsed-literal dependent-rhel-ubuntu">
<ul class="tabbed-parsed-literal nav-tabs">
<li class="example-tab example-tab-rhel active"><a href="#">RHEL</a></li>
<li class="example-tab example-tab-ubuntu "><a href="#">Ubuntu</a></li>
</ul>

<div class="tab-contents">

<div class="tab-pane tab-pane-rhel active">
<div class="code code-tab">
<div class="highlight-console">
<!-- tabbed-parsed-literal start -->
<div class="highlight"><pre><span></span><span class="gp">$</span> curl -O http://downloads.cask.co/cdap-distributed-rpm-bundle/cdap-distributed-rpm-bundle-6.1.1.tgz
<span class="gp">$ </span><span class="copyable-text">tar xf cdap-distributed-rpm-bundle-6.1.1.tgz -C <span class="nv">$dir</span></span>
</pre></div>
<!-- tabbed-parsed-literal end --></div>
</div>
</div>
<div class="tab-pane tab-pane-ubuntu ">
<div class="code code-tab">
<div class="highlight-console">
<!-- tabbed-parsed-literal start -->
<div class="highlight"><pre><span></span><span class="gp">$</span> curl -O http://downloads.cask.co/cdap-distributed-deb-bundle/cdap-distributed-deb-bundle-6.1.1.tgz
<span class="gp">$ </span><span class="copyable-text">tar xf cdap-distributed-deb-bundle-6.1.1.tgz -C <span class="nv">$dir</span></span>
</pre></div>
<!-- tabbed-parsed-literal end --></div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="section" id="installing-cdap-services">
<h2>Installing CDAP Services<a class="headerlink" href="#installing-cdap-services" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="package-installation">
<h3>Package Installation<a class="headerlink" href="#package-installation" title="Permalink to this headline">🔗</a></h3>
<p>Install the CDAP packages by using one of the following methods. Do this on each of the
boxes that are being used for the CDAP components; our recommended installation is a
minimum of two boxes.</p>
<p>This will download and install the latest version of CDAP with all of its dependencies.</p>
<p id="mapr-cli-package-installation">To install the optional <span class="xref std std-ref">CDAP CLI</span> on a node, add the <code class="docutils literal notranslate"><span class="pre">cdap-cli</span></code> package to
the list of packages in the commands below.</p>
<div class="section" id="using-chef">
<h4>Using Chef<a class="headerlink" href="#using-chef" title="Permalink to this headline">🔗</a></h4>
<p>If you are using <a class="reference external" href="https://www.getchef.com">Chef</a> to install CDAP, an <a class="reference external" href="https://supermarket.getchef.com/cookbooks/cdap">official
cookbook is available</a>.</p>
<p>To install the optional <span class="xref std std-ref">CDAP CLI</span> on a node, use the <code class="docutils literal notranslate"><span class="pre">fullstack</span></code> recipe.</p>
</div>
<div class="section" id="id1">
<h4>On RPM using Yum<a class="headerlink" href="#id1" title="Permalink to this headline">🔗</a></h4>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo yum install cdap-gateway cdap-kafka cdap-master cdap-security cdap-ui
</pre></div>
</div>
</div>
<div class="section" id="id2">
<h4>On Debian using APT<a class="headerlink" href="#id2" title="Permalink to this headline">🔗</a></h4>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo apt-get install cdap-gateway cdap-kafka cdap-master cdap-security cdap-ui
</pre></div>
</div>
</div>
<div class="section" id="id3">
<h4>Using Tar<a class="headerlink" href="#id3" title="Permalink to this headline">🔗</a></h4>
<p>Having <a class="reference internal" href="#mapr-install-using-tar"><span class="std std-ref">previously downloaded and unpacked</span></a>
the appropriate tar file to a directory <code class="docutils literal notranslate"><span class="pre">$dir</span></code>, use:</p>

<script type="text/javascript">

  $(function tabbedparsedliteral2() {
    var tabs = ['rhel', 'ubuntu'];
    var mapping = {'rhel': 'rhel', 'ubuntu': 'ubuntu'};
    var tabSetID = 'rhel-ubuntu';
    for (var i = 0; i < tabs.length; i++) {
      var tab = tabs[i];
      $("#tabbedparsedliteral2 .example-tab-" + tab).click(changeExampleTab(tab, mapping, "tabbedparsedliteral2", tabSetID));
    }
  });

</script>
<div id="tabbedparsedliteral2" class="tabbed-parsed-literal dependent-rhel-ubuntu">
<ul class="tabbed-parsed-literal nav-tabs">
<li class="example-tab example-tab-rhel active"><a href="#">RHEL</a></li>
<li class="example-tab example-tab-ubuntu "><a href="#">Ubuntu</a></li>
</ul>

<div class="tab-contents">

<div class="tab-pane tab-pane-rhel active">
<div class="code code-tab">
<div class="highlight-console">
<!-- tabbed-parsed-literal start -->
<div class="highlight"><pre><span></span><span class="gp">$</span> sudo yum localinstall <span class="nv">$dir</span>/*.rpm
</pre></div>
<!-- tabbed-parsed-literal end --></div>
</div>
</div>
<div class="tab-pane tab-pane-ubuntu ">
<div class="code code-tab">
<div class="highlight-console">
<!-- tabbed-parsed-literal start -->
<div class="highlight"><pre><span></span><span class="gp">$</span> sudo dpkg -i <span class="nv">$dir</span>/*.deb
<span class="gp">$ </span><span class="copyable-text">sudo apt-get install -f</span>
</pre></div>
<!-- tabbed-parsed-literal end --></div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="section" id="create-required-directories">
<h3>Create Required Directories<a class="headerlink" href="#create-required-directories" title="Permalink to this headline">🔗</a></h3>
<p>To prepare your cluster so that CDAP can write to its default namespace,
create a top-level <code class="docutils literal notranslate"><span class="pre">/cdap</span></code> directory in MapRFS, owned by the MapRFS user <code class="docutils literal notranslate"><span class="pre">cdap</span></code>:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> su mapr
<span class="gp">$</span> hadoop fs -mkdir -p /cdap <span class="o">&amp;&amp;</span> hadoop fs -chown cdap /cdap
</pre></div>
</div>
<p>In the CDAP packages, the default property <code class="docutils literal notranslate"><span class="pre">hdfs.namespace</span></code> is <code class="docutils literal notranslate"><span class="pre">/cdap</span></code> and the default property
<code class="docutils literal notranslate"><span class="pre">hdfs.user</span></code> is <code class="docutils literal notranslate"><span class="pre">yarn</span></code>.</p>
<p>Also, create a <code class="docutils literal notranslate"><span class="pre">tx.snapshot</span></code> subdirectory:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> su mapr
<span class="gp">$</span> hadoop fs -mkdir -p /cdap/tx.snapshot <span class="o">&amp;&amp;</span> hadoop fs -chown cdap /cdap/tx.snapshot
</pre></div>
</div>
<p><strong>Note:</strong> If you have customized (or will be customizing) the property
<code class="docutils literal notranslate"><span class="pre">data.tx.snapshot.dir</span></code> in your <a class="reference internal" href="../appendices/cdap-site.html#appendix-cdap-site-xml"><span class="std std-ref">CDAP configuration</span></a>, use
that value instead for <code class="docutils literal notranslate"><span class="pre">/cdap/tx.snapshot</span></code>.</p>
</div>
<div class="section" id="cdap-configuration">
<span id="mapr-configuration-central"></span><span id="mapr-configuration"></span><h3>CDAP Configuration<a class="headerlink" href="#cdap-configuration" title="Permalink to this headline">🔗</a></h3>
<p>This section describes how to configure the CDAP components so they work with your
existing Hadoop cluster. Certain Hadoop components may need changes, as described below,
for CDAP to run successfully.</p>
<ol class="arabic">
<li><p class="first">CDAP packages utilize a central configuration, stored by default in <code class="docutils literal notranslate"><span class="pre">/etc/cdap</span></code>.</p>
<p>When you install the CDAP base package, a default configuration is placed in
<code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf.dist</span></code>. The <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> file is a placeholder
where you can define your specific configuration for all CDAP components.
The <code class="docutils literal notranslate"><span class="pre">cdap-site.xml.example</span></code> file shows the properties that usually require customization
for all installations.</p>
<p id="mapr-configuration-alternatives">Similar to Hadoop, CDAP utilizes the <code class="docutils literal notranslate"><span class="pre">alternatives</span></code> framework to allow you to
easily switch between multiple configurations. The <code class="docutils literal notranslate"><span class="pre">alternatives</span></code> system is used for ease of
management and allows you to to choose between different directories to fulfill the
same purpose.</p>
<p>Simply copy the contents of <code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf.dist</span></code> into a directory of your choice
(such as <code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf.mycdap</span></code>) and make all of your customizations there.
Then run the <code class="docutils literal notranslate"><span class="pre">alternatives</span></code> command to point the <code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf</span></code> symlink
to your custom directory <code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf.mycdap</span></code>:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo cp -r /etc/cdap/conf.dist /etc/cdap/conf.mycdap
<span class="gp">$</span> sudo update-alternatives --install /etc/cdap/conf cdap-conf /etc/cdap/conf.mycdap <span class="m">10</span>
</pre></div>
</div>
</li>
<li id="mapr-configuration-options"><p class="first">Configure the <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> after you have installed the CDAP packages.</p>
<p>To configure your particular installation, modify <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code>, using
<code class="docutils literal notranslate"><span class="pre">cdap-site.xml.example</span></code> as a model. (See the <a class="reference internal" href="../appendices/minimal-cdap-site.html#appendix-minimal-cdap-site-xml"><span class="std std-ref">appendix</span></a> for a listing of <code class="docutils literal notranslate"><span class="pre">cdap-site.xml.example</span></code>,
the minimal <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> file required.)</p>
<p>Customize your configuration by creating (or editing if existing) an <cite>.xml</cite> file
<code class="docutils literal notranslate"><span class="pre">conf/cdap-site.xml</span></code> and set appropriate properties:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo cp -f /etc/cdap/conf.mycdap/cdap-site.xml.example /etc/cdap/conf.mycdap/cdap-site.xml
<span class="gp">$</span> sudo vi /etc/cdap/conf.mycdap/cdap-site.xml
</pre></div>
</div>
</li>
<li><p class="first">If necessary, customize the file <code class="docutils literal notranslate"><span class="pre">cdap-env.sh</span></code> after you have installed the CDAP packages.</p>
<p>Environment variables that will be included in the environment used when launching CDAP and
can be set in the <code class="docutils literal notranslate"><span class="pre">cdap-env.sh</span></code> file, usually at <code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf/cdap-env.sh</span></code>.</p>
<p>This is only necessary if you need to customize the environment launching CDAP, such
as described below under <a class="reference internal" href="#mapr-configuration-local-storage"><span class="std std-ref">Local Storage Configuration</span></a>.</p>
</li>
<li><p class="first">Due to an issue with the version of the Kafka ZooKeeper client shipped with MapR,
it is necessary to disable use of the embedded Kafka in CDAP by setting these properties:</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
   <span class="nt">&lt;name&gt;</span>master.collect.containers.log<span class="nt">&lt;/name&gt;</span>
   <span class="nt">&lt;value&gt;</span>false<span class="nt">&lt;/value&gt;</span>
 <span class="nt">&lt;/property&gt;</span>

<span class="nt">&lt;property&gt;</span>
   <span class="nt">&lt;name&gt;</span>master.collect.app.containers.log.level<span class="nt">&lt;/name&gt;</span>
   <span class="nt">&lt;value&gt;</span>OFF<span class="nt">&lt;/value&gt;</span>
 <span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
<p>As a consequence of this setting, the container logs will not be streamed back to the
master process log file. This issue is due to a <a class="reference external" href="https://issues.apache.org/jira/browse/TWILL-139?focusedCommentId=14598628">known Kafka issue</a>.</p>
</li>
<li><p class="first">Depending on your installation, you may need to set these properties:</p>
</li>
</ol>
<blockquote>
<div><ol class="lowerroman">
<li><p class="first">Check that the <code class="docutils literal notranslate"><span class="pre">zookeeper.quorum</span></code> property in <code class="docutils literal notranslate"><span class="pre">conf/cdap-site.xml</span></code> is set to the
<strong>ZooKeeper quorum string</strong>, a comma-delimited list of fully-qualified domain names for
the ZooKeeper quorum:</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>zookeeper.quorum<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>FQDN1:5181,FQDN2:5181/${root.namespace}<span class="nt">&lt;/value&gt;</span>
  <span class="nt">&lt;description&gt;</span>
    ZooKeeper quorum string; specifies the ZooKeeper host:port;
    substitute the quorum for the components shown here (FQDN1:5181,FQDN2:5181)
  <span class="nt">&lt;/description&gt;</span>
<span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
<p><em>Note:</em> The MapR default ZooKeeper port of 5181 is different than the ZooKeeper default of 2181.</p>
</li>
<li><p class="first">Check that the <code class="docutils literal notranslate"><span class="pre">router.server.address</span></code> property in <code class="docutils literal notranslate"><span class="pre">conf/cdap-site.xml</span></code> is set to the
<strong>hostname of the CDAP Router</strong>. The CDAP UI uses this property to connect to the Router:</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>router.server.address<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>{router-host-name}<span class="nt">&lt;/value&gt;</span>
  <span class="nt">&lt;description&gt;</span>CDAP Router address to which CDAP UI connects<span class="nt">&lt;/description&gt;</span>
<span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
</li>
<li><p class="first">Check that there exists in HDFS a user directory for the <code class="docutils literal notranslate"><span class="pre">hdfs.user</span></code> property of <code class="docutils literal notranslate"><span class="pre">conf/cdap-site.xml</span></code>.
By default, the HDFS user is <code class="docutils literal notranslate"><span class="pre">cdap</span></code>. If necessary, create the directory:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> su mapr
<span class="gp">$</span> hadoop fs -mkdir -p /user/<code class="docutils literal notranslate"><span class="pre">cdap</span></code> &amp;&amp; hadoop fs -chown <code class="docutils literal notranslate"><span class="pre">cdap</span></code>:<code class="docutils literal notranslate"><span class="pre">cdap</span></code> /user/<code class="docutils literal notranslate"><span class="pre">cdap</span></code>
</pre>
</div>
</li>
<li><p class="first">If you want to use <strong>an HDFS directory</strong> with a name other than <code class="docutils literal notranslate"><span class="pre">/cdap</span></code>:</p>
<ol class="arabic">
<li><p class="first">Create the HDFS directory you want to use, such as <code class="docutils literal notranslate"><span class="pre">/myhadoop/myspace</span></code>.</p>
</li>
<li><p class="first">Create an <code class="docutils literal notranslate"><span class="pre">hdfs.namespace</span></code> property for the HDFS directory in <code class="docutils literal notranslate"><span class="pre">conf/cdap-site.xml</span></code>:</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>hdfs.namespace<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>/myhadoop/myspace<span class="nt">&lt;/value&gt;</span>
  <span class="nt">&lt;description&gt;</span>Default HDFS namespace<span class="nt">&lt;/description&gt;</span>
<span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
</li>
<li><p class="first">Check that the default HDFS user <code class="docutils literal notranslate"><span class="pre">cdap</span></code> owns that HDFS directory.</p>
</li>
</ol>
</li>
<li><p class="first">If you want to use <strong>an HDFS user</strong> other than <code class="docutils literal notranslate"><span class="pre">cdap</span></code>, such as <code class="docutils literal notranslate"><span class="pre">my_username</span></code>:</p>
<ol class="arabic">
<li><p class="first">Check that there is—and create if necessary—a corresponding user on all machines
in the cluster on which YARN is running (typically, all of the machines).</p>
</li>
<li><p class="first">Create an <code class="docutils literal notranslate"><span class="pre">hdfs.user</span></code> property for that user in <code class="docutils literal notranslate"><span class="pre">conf/cdap-site.xml</span></code>:</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>hdfs.user<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>my_username<span class="nt">&lt;/value&gt;</span>
  <span class="nt">&lt;description&gt;</span>User for accessing HDFS<span class="nt">&lt;/description&gt;</span>
<span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
</li>
<li><p class="first">Check that the HDFS user owns the HDFS directory described by <code class="docutils literal notranslate"><span class="pre">hdfs.namespace</span></code> on all machines.</p>
</li>
<li><p class="first">Check that there exists in HDFS a <code class="docutils literal notranslate"><span class="pre">/user/</span></code> directory for that HDFS user, as described above, such as:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> su mapr
<span class="gp">$</span> hadoop fs -mkdir -p /user/<code class="docutils literal notranslate"><span class="pre">my_username</span></code> &amp;&amp; hadoop fs -chown <code class="docutils literal notranslate"><span class="pre">my_username</span></code>:<code class="docutils literal notranslate"><span class="pre">my_username</span></code> /user/<code class="docutils literal notranslate"><span class="pre">my_username</span></code>
</pre>
</div>
</li>
<li><p class="first">If you use an HDFS user other than <code class="docutils literal notranslate"><span class="pre">cdap</span></code>, you must use either a secure
cluster or use the <a class="reference external" href="https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/SecureContainer.html">LinuxContainerExecutor</a>
instead of the <code class="docutils literal notranslate"><span class="pre">DefaultContainerExecutor</span></code>. (Because of how <code class="docutils literal notranslate"><span class="pre">DefaultContainerExecutor</span></code>
works, other containers will launch as <code class="docutils literal notranslate"><span class="pre">cdap</span></code> rather than the specified
<code class="docutils literal notranslate"><span class="pre">hdfs.user</span></code>.) On Kerberos-enabled clusters, you must use <code class="docutils literal notranslate"><span class="pre">LinuxContainerExecutor</span></code>
as the <code class="docutils literal notranslate"><span class="pre">DefaultContainerExecutor</span></code> will not work correctly.</p>
</li>
</ol>
</li>
<li><p class="first">To use the <strong>ad-hoc querying capabilities of CDAP,</strong> ensure the cluster has a
compatible version of Hive installed. See the section on <a class="reference internal" href="../hadoop-compatibility.html#admin-manual-hadoop-compatibility-matrix"><span class="std std-ref">Hadoop Compatibility</span></a>. To use this feature on secure Hadoop
clusters, please see these instructions on <a class="reference internal" href="#mapr-configuration-enabling-kerberos"><span class="std std-ref">configuring secure Hadoop</span></a>.</p>
<p id="mapr-configuration-explore-service"><strong>Note:</strong> Some versions of Hive contain a bug that may prevent the CDAP Explore Service from starting
up. See <a class="reference external" href="https://issues.cask.co/browse/CDAP-1865">CDAP-1865</a> for more information about the issue.
If the CDAP Explore Service fails to start and you see a
<code class="docutils literal notranslate"><span class="pre">javax.jdo.JDODataStoreException:</span> <span class="pre">Communications</span> <span class="pre">link</span> <span class="pre">failure</span></code> in the log, try
adding this property to the Hive <code class="docutils literal notranslate"><span class="pre">hive-site.xml</span></code> file:</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>datanucleus.connectionPoolingType<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>DBCP<span class="nt">&lt;/value&gt;</span>
<span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
</li>
<li><p class="first">If Hive is <strong>not</strong> going to be installed, disable the CDAP Explore Service in
<code class="docutils literal notranslate"><span class="pre">conf/cdap-site.xml</span></code> (by default, it is enabled):</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>explore.enabled<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>false<span class="nt">&lt;/value&gt;</span>
  <span class="nt">&lt;description&gt;</span>Enable Explore functionality<span class="nt">&lt;/description&gt;</span>
<span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
</li>
<li><p class="first">If you’d like to publish metadata updates to an external Apache Kafka instance,
CDAP has the capability of publishing notifications upon metadata updates. Details on
the configuration settings and an example output are shown in the <a class="reference external" href="../../../developer-manual/metadata/audit-logging.html#audit-logging" title="(in Cask Data Application Platform v6.1.1)"><span class="xref std std-ref">Audit logging
section</span></a> of the Developer Manual.</p>
</li>
</ol>
</div></blockquote>
</div>
<div class="section" id="ulimit-configuration">
<span id="mapr-configuration-ulimit"></span><h3>ULIMIT Configuration<a class="headerlink" href="#ulimit-configuration" title="Permalink to this headline">🔗</a></h3>
<p>When you install the CDAP packages, the <code class="docutils literal notranslate"><span class="pre">ulimit</span></code> settings for the CDAP user are
specified in the <code class="docutils literal notranslate"><span class="pre">/etc/security/limits.d/cdap.conf</span></code> file. On Ubuntu, they won’t take
effect unless you make changes to the <code class="docutils literal notranslate"><span class="pre">/etc/pam.d/common-session</span> <span class="pre">file</span></code>. You can check
this setting with the command <code class="docutils literal notranslate"><span class="pre">ulimit</span> <span class="pre">-n</span></code> when logged in as the CDAP user.
For more information, refer to the <code class="docutils literal notranslate"><span class="pre">ulimit</span></code> discussion in the <a class="reference external" href="https://hbase.apache.org/book.html#ulimit">Apache HBase Reference
Guide</a>.</p>
</div>
<div class="section" id="local-storage-configuration">
<span id="mapr-configuration-tmp-files"></span><span id="mapr-configuration-local-storage"></span><h3>Local Storage Configuration<a class="headerlink" href="#local-storage-configuration" title="Permalink to this headline">🔗</a></h3>
<p>Local storage directories—depending on the distribution—are utilized
by CDAP for deploying applications and operating CDAP.</p>
<p>The CDAP user (the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> system user) <strong>must</strong> be able to write to <strong>all</strong> of these
directories, as they are used for deploying applications and for operating CDAP.</p>
<ul>
<li><p class="first"><strong>List of local storage directories</strong></p>
<ul class="simple">
<li>Properties specified in the <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> file, as described in the <a class="reference internal" href="../appendices/cdap-site.html#appendix-cdap-site-xml"><span class="std std-ref">Appendix: cdap-site.xml, cdap-default.xml</span></a>:<ul>
<li><code class="docutils literal notranslate"><span class="pre">app.temp.dir</span></code> (default: <code class="docutils literal notranslate"><span class="pre">/tmp</span></code>)</li>
<li><code class="docutils literal notranslate"><span class="pre">kafka.server.log.dirs</span></code> (default: <code class="docutils literal notranslate"><span class="pre">/tmp/kafka-logs</span></code>)</li>
<li><code class="docutils literal notranslate"><span class="pre">local.data.dir</span></code> (default: <code class="docutils literal notranslate"><span class="pre">data</span></code>; if this is instead an absolute path, needs to be writable)</li>
</ul>
</li>
<li>Additional directories:<ul>
<li><code class="docutils literal notranslate"><span class="pre">/var/cdap/run</span></code> (used as a PID directory, created by the packages)</li>
<li><code class="docutils literal notranslate"><span class="pre">/var/log/cdap</span></code> (used as log directory, created by the packages)</li>
<li><code class="docutils literal notranslate"><span class="pre">/var/run/cdap</span></code> (default CDAP user’s home directory, created by the packages)</li>
<li><code class="docutils literal notranslate"><span class="pre">/var/tmp/cdap</span></code> (default <code class="docutils literal notranslate"><span class="pre">LOCAL_DIR</span></code>—see below—defined and created in the CDAP init scripts)</li>
</ul>
</li>
</ul>
</li>
<li><p class="first">Note that <code class="docutils literal notranslate"><span class="pre">local.data.dir</span></code>—which defines the directory for program jar storage
when deploying to YARN—is set in the <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> and defaults to the
relative path <code class="docutils literal notranslate"><span class="pre">data</span></code>. If the value of <code class="docutils literal notranslate"><span class="pre">local.data.dir</span></code> is <em>relative,</em> it is put
under <code class="docutils literal notranslate"><span class="pre">LOCAL_DIR</span></code>, such as <code class="docutils literal notranslate"><span class="pre">/var/tmp/cdap/data</span></code>. However, if instead it is an
<em>absolute</em> path, that alone is used as the value. This is desirable so you can easily
configure this directory to be elsewhere.</p>
</li>
<li><p class="first">The CDAP Master service is governed by environment variables, which set the
directories it uses:</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">TEMP_DIR</span></code> (default: <code class="docutils literal notranslate"><span class="pre">/tmp</span></code>): The directory serving as the <code class="docutils literal notranslate"><span class="pre">java.io.tmpdir</span></code>
directory</li>
<li><code class="docutils literal notranslate"><span class="pre">LOCAL_DIR</span></code> (default: <code class="docutils literal notranslate"><span class="pre">/var/tmp/cdap</span></code>): The directory serving as the user directory
for CDAP Master</li>
</ul>
<p>These variables can be set in the file <code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf/cdap-env.sh</span></code> and will be included in
the environment when launching CDAP. See <a class="reference internal" href="#mapr-configuration-central"><span class="std std-ref">CDAP Configuration</span></a>
for details of the central configuration used by CDAP and how to implement this.</p>
</li>
<li><p class="first">As in all installations, the <code class="docutils literal notranslate"><span class="pre">kafka.server.log.dirs</span></code> may need to be created locally.
If you configure <code class="docutils literal notranslate"><span class="pre">kafka.server.log.dirs</span></code> (or any of the other settable parameters) to
a particular directory or directories, you need to make sure that <strong>the directories
exist</strong> and that they <strong>are writable</strong> by the CDAP user.</p>
</li>
</ul>
</div>
<div class="section" id="yarn-application-classpath">
<h3>YARN Application Classpath<a class="headerlink" href="#yarn-application-classpath" title="Permalink to this headline">🔗</a></h3>
<p>CDAP requires that an additional entry—<code class="docutils literal notranslate"><span class="pre">/opt/mapr/lib/*</span></code>—be appended to the
<code class="docutils literal notranslate"><span class="pre">yarn.application.classpath</span></code> setting of <code class="docutils literal notranslate"><span class="pre">yarn-site.xml</span></code>. (This file is usually in
<code class="docutils literal notranslate"><span class="pre">/opt/mapr/hadoop/hadoop-&lt;hadoop-version&gt;/etc/hadoop/yarn-site.xml</span></code>.) The default
<code class="docutils literal notranslate"><span class="pre">yarn.application.classpath</span></code> for Linux with this additional entry appended is
(reformatted to fit):</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span>$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/share/hadoop/common/*,
$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,
$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
$HADOOP_YARN_HOME/share/hadoop/yarn/*,
$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/lib/*,
/opt/mapr/lib/*
</pre></div>
</div>
<p><strong>Notes:</strong></p>
<ul class="simple">
<li>Since MapR might not dereference the Hadoop variables (such as <code class="docutils literal notranslate"><span class="pre">$HADOOP_CONF_DIR</span></code>)
correctly, we recommend specifying their full paths instead of the variables we have
included here.</li>
<li>MapR does not, by default, provide a configured <code class="docutils literal notranslate"><span class="pre">yarn.application.classpath</span></code>, and
you will need to add this entry to <code class="docutils literal notranslate"><span class="pre">yarn-site.xml</span></code>. If you install using <a class="reference external" href="https://www.getchef.com">Chef</a>, that file and entry is created automatically, but not
with dereferenced Hadoop variables.</li>
</ul>
</div>
</div>
<div class="section" id="starting-cdap-services">
<span id="mapr-starting-services"></span><h2>Starting CDAP Services<a class="headerlink" href="#starting-cdap-services" title="Permalink to this headline">🔗</a></h2>
<p>When all the packages and dependencies have been installed, and the configuration
parameters set, you can start the services on each of the CDAP boxes by running the
command:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> <span class="k">for</span> i in <span class="sb">`</span>ls /etc/init.d/ <span class="p">|</span> grep cdap<span class="sb">`</span> <span class="p">;</span> <span class="k">do</span> sudo service <span class="nv">$i</span> start <span class="p">;</span> <span class="k">done</span>
</pre></div>
</div>
<p>When all the services have completed starting, the CDAP UI should then be
accessible through a browser at port <code class="docutils literal notranslate"><span class="pre">11011</span></code>.</p>
<p>The URL will be <code class="docutils literal notranslate"><span class="pre">http://&lt;host&gt;:11011</span></code> where <code class="docutils literal notranslate"><span class="pre">&lt;host&gt;</span></code> is the IP address of
one of the machines where you installed the packages and started the services.</p>
<p id="mapr-starting-services-java-heapmax"><strong>Note:</strong> Service-specific Java heap memory settings (that override the default values)
can be created by setting these environment variables:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">AUTH_JAVA_HEAPMAX</span>
<span class="go">KAFKA_JAVA_HEAPMAX</span>
<span class="go">MASTER_JAVA_HEAPMAX</span>
<span class="go">ROUTER_JAVA_HEAPMAX</span>
</pre></div>
</div>
<p>such as:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> <span class="nb">export</span> <span class="nv">AUTH_JAVA_HEAPMAX</span><span class="o">=</span><span class="s2">&quot;-Xmx1024m&quot;</span>
</pre></div>
</div>
<p>Add any overriding settings to a file, usually <code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf/cdap-env.sh</span></code>. As
described above (in <a class="reference internal" href="#mapr-configuration"><span class="std std-ref">CDAP Configuration</span></a>), the location of this file will
depend on your particular configuration.</p>
</div>
<div class="section" id="verification">
<span id="mapr-verification"></span><h2>Verification<a class="headerlink" href="#verification" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="cdap-smoke-test">
<h3>CDAP Smoke Test<a class="headerlink" href="#cdap-smoke-test" title="Permalink to this headline">🔗</a></h3>
<p>The CDAP UI may initially show errors while all of the CDAP YARN containers are
starting up. Allow for up to a few minutes for this.</p>
<p>The <em>Administration</em> page of the CDAP UI shows the status of the CDAP services.
It can be reached at <code class="docutils literal notranslate"><span class="pre">http://&lt;cdap-host&gt;:11011/cdap/administration</span></code>, substituting for
<code class="docutils literal notranslate"><span class="pre">&lt;cdap-host&gt;</span></code> the host name or IP address of the CDAP server:</p>
<div class="figure align-center" id="id4" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/console-distributed.png"><img alt="../_images/console-distributed.png" class="bordered-image" src="../_images/console-distributed.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>CDAP UI:</strong> Showing started-up, <em>Administration</em> page.</span></p>
</div>
</div>
</div>
<div class="section" id="advanced-topics">
<span id="mapr-installation-advanced-topics"></span><h2>Advanced Topics<a class="headerlink" href="#advanced-topics" title="Permalink to this headline">🔗</a></h2>
<ul class="simple">
<li><a class="reference internal" href="#mapr-configuration-security"><span class="std std-ref">Enabling Security</span></a></li>
<li><a class="reference internal" href="#mapr-configuration-enabling-kerberos"><span class="std std-ref">Enabling Kerberos</span></a></li>
<li><a class="reference internal" href="#mapr-configuration-highly-available"><span class="std std-ref">Enabling CDAP High Availability</span></a></li>
<li><a class="reference internal" href="#mapr-configuration-enabling-hive-execution-engines"><span class="std std-ref">Enabling Hive Execution Engines</span></a></li>
</ul>
<span class="target" id="mapr-configuration-security"></span><div class="section" id="enabling-security">
<h3>Enabling Security<a class="headerlink" href="#enabling-security" title="Permalink to this headline">🔗</a></h3>
<p>Cask Data Application Platform (CDAP) supports securing clusters using perimeter security, authorization,
impersonation and secure storage.</p>
<p>Network (or cluster) perimeter security limits outside access, providing a first level of
security. However, perimeter security itself does not provide the safeguards of authentication,
authorization and service request management that a secure Hadoop cluster provides.</p>
<p>Authorization provides a way of enforcing access control on CDAP entities.</p>
<p>Impersonation ensures that programs inside CDAP are run as configured users at the namespace level. When enabled, it
guarantees that all actions on datasets, streams and other resources happen as the configured user.</p>
<p>We recommend that in order for CDAP to be secure, CDAP security should always be used in conjunction with
<a class="reference external" href="http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SecureMode.html">secure Hadoop clusters</a>.
In cases where secure Hadoop is not or cannot be used, it is inherently insecure and any applications
running on the cluster are effectively “trusted”. Although there is still value in having perimeter security,
authorization enforcement and secure storage in that situation, whenever possible a secure Hadoop
cluster should be employed with CDAP security.</p>
<p>For instructions on enabling CDAP Security, see <a class="reference internal" href="../security/index.html#admin-security"><span class="std std-ref">CDAP Security</span></a>.</p>
<span class="target" id="mapr-configuration-enabling-kerberos"></span></div>
<div class="section" id="enabling-kerberos">
<h3>Enabling Kerberos<a class="headerlink" href="#enabling-kerberos" title="Permalink to this headline">🔗</a></h3>
<p>When running CDAP on top of a secure Hadoop cluster (using Kerberos authentication), the
CDAP processes will need to obtain Kerberos credentials in order to authenticate with
Hadoop, HBase, ZooKeeper, and (optionally) Hive.  In this case, the setting for
<code class="docutils literal notranslate"><span class="pre">hdfs.user</span></code> in <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> will be ignored and the CDAP processes will be
identified by the default authenticated Kerberos principal.</p>
<p><strong>Note:</strong> CDAP support for secure Hadoop clusters is limited to the latest versions of
CDH, HDP, MapR, and Apache BigTop; currently, Amazon EMR is not supported on secure Hadoop
clusters.</p>
<ol class="upperalpha">
<li><p class="first">In order to configure <strong>CDAP for Kerberos authentication:</strong></p>
<ol class="arabic">
<li><p class="first">Create a Kerberos principal for the user running CDAP.  The principal name should be in
the form <code class="docutils literal notranslate"><span class="pre">username/hostname&#64;REALM</span></code>, creating a separate principal for each host
where a CDAP service will run.  This prevents simultaneous login attempts from
multiple hosts from being mistaken for a replay attack by the Kerberos KDC.</p>
</li>
<li><p class="first">Generate a keytab file for each CDAP Master Kerberos principal, and place the file as
<code class="docutils literal notranslate"><span class="pre">/etc/security/keytabs/cdap.keytab</span></code> on the corresponding CDAP Master host.  The
file should be readable only by the user running the CDAP Master service.</p>
</li>
<li><p class="first">Edit <code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf/cdap-site.xml</span></code> on each host running a CDAP service, substituting the Kerberos
primary (user) for <code class="docutils literal notranslate"><span class="pre">&lt;cdap-principal&gt;</span></code>, and your Kerberos authentication realm for <code class="docutils literal notranslate"><span class="pre">EXAMPLE.COM</span></code>,
when adding these two properties:</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>cdap.master.kerberos.keytab<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>/etc/security/keytabs/cdap.service.keytab<span class="nt">&lt;/value&gt;</span>
<span class="nt">&lt;/property&gt;</span>

<span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>cdap.master.kerberos.principal<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;&lt;cdap-principal&gt;</span>/_HOST@EXAMPLE.COM<span class="nt">&lt;/value&gt;</span>
<span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
</li>
<li><p class="first">The <code class="docutils literal notranslate"><span class="pre">&lt;cdap-principal&gt;</span></code> is shown in the commands that follow as <code class="docutils literal notranslate"><span class="pre">cdap</span></code>;
however, you are free to use a different appropriate name.</p>
</li>
<li><p class="first">The <code class="docutils literal notranslate"><span class="pre">/cdap</span></code> directory needs to be owned by the <code class="docutils literal notranslate"><span class="pre">&lt;cdap-principal&gt;</span></code>; you can set
that by running the following command as the <code class="docutils literal notranslate"><span class="pre">hdfs</span></code> user (change the ownership in the
command from <code class="docutils literal notranslate"><span class="pre">cdap</span></code> to whatever is the <code class="docutils literal notranslate"><span class="pre">&lt;cdap-principal&gt;</span></code>):</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> <span class="p">|</span>su_hdfs<span class="p">|</span> <span class="o">&amp;&amp;</span> hadoop fs -mkdir -p /cdap <span class="o">&amp;&amp;</span> hadoop fs -chown cdap /cdap
</pre></div>
</div>
</li>
<li><p class="first">When running on a secure HBase cluster, as the <code class="docutils literal notranslate"><span class="pre">hbase</span></code> user, issue the command:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> <span class="nb">echo</span> <span class="s2">&quot;grant &#39;cdap&#39;, &#39;RWCA&#39;&quot;</span> <span class="p">|</span> hbase shell
</pre></div>
</div>
</li>
<li><p class="first">When CDAP Master is started, it will login using the configured keytab file and principal.</p>
</li>
</ol>
</li>
</ol>
<blockquote>
<div><ol class="arabic" start="8">
<li><p class="first">Edit <code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf/cdap-env.sh</span></code> on each host running CDAP Master, adding:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">export OPTS=&quot;${OPTS} -Djava.security.auth.login.config=/opt/mapr/conf/mapr.login.conf -Dhadoop.login=hybrid -Dzookeeper.saslprovider=com.mapr.security.maprsasl.MaprSaslProvider&quot;</span>
</pre></div>
</div>
</li>
</ol>
</div></blockquote>
<ol class="upperalpha" start="2">
<li><p class="first">In order to configure <strong>YARN for secure Hadoop:</strong> the <code class="docutils literal notranslate"><span class="pre">&lt;cdap-principal&gt;</span></code> user must be
able to launch YARN containers, either by adding it to the YARN <code class="docutils literal notranslate"><span class="pre">allowed.system.users</span></code>
whitelist (preferred) or by adjusting the YARN <code class="docutils literal notranslate"><span class="pre">min.user.id</span></code> to include the <code class="docutils literal notranslate"><span class="pre">&lt;cdap-principal&gt;</span></code> user.</p>
</li>
<li><p class="first">In order to configure <strong>CDAP Explore Service for secure Hadoop:</strong></p>
<ol class="arabic">
<li><p class="first">To allow CDAP to act as a Hive client, it must be given <code class="docutils literal notranslate"><span class="pre">proxyuser</span></code> permissions and allowed from all hosts.
For example: set the following properties in the configuration file <code class="docutils literal notranslate"><span class="pre">core-site.xml</span></code>, where <code class="docutils literal notranslate"><span class="pre">cdap</span></code> is a system
group to which the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user is a member:</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>hadoop.proxyuser.hive.groups<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>cdap,hadoop,hive<span class="nt">&lt;/value&gt;</span>
<span class="nt">&lt;/property&gt;</span>
<span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>hadoop.proxyuser.hive.hosts<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>*<span class="nt">&lt;/value&gt;</span>
<span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
</li>
<li><p class="first">To execute Hive queries on a secure cluster, the cluster must be running the MapReduce <code class="docutils literal notranslate"><span class="pre">JobHistoryServer</span></code>
service. Consult your distribution documentation on the proper configuration of this service.</p>
</li>
<li><p class="first">To execute Hive queries on a secure cluster using the CDAP Explore Service, the Hive MetaStore service
must be configured for Kerberos authentication. Consult your distribution documentation on the proper
configuration of the Hive MetaStore service.</p>
</li>
</ol>
<p>With all these properties set, the CDAP Explore Service will run on secure Hadoop clusters.</p>
</li>
</ol>
</div>
<div class="section" id="enabling-cdap-ha">
<span id="mapr-configuration-highly-available"></span><span id="mapr-highly-available"></span><h3>Enabling CDAP HA<a class="headerlink" href="#enabling-cdap-ha" title="Permalink to this headline">🔗</a></h3>
<p>In addition to having a <a class="reference internal" href="../deployment-architectures.html#admin-manual-install-deployment-architectures-ha"><span class="std std-ref">cluster architecture</span></a>
that supports HA (high availability), these additional configuration steps need to be followed and completed:</p>
<div class="section" id="cdap-components">
<h4>CDAP Components<a class="headerlink" href="#cdap-components" title="Permalink to this headline">🔗</a></h4>
<p>For each of the CDAP components listed below (Master, Router, Kafka, UI, Authentication Server), these
comments apply:</p>
<ul class="simple">
<li>Sync the configuration files (such as <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> and <code class="docutils literal notranslate"><span class="pre">cdap-security.xml</span></code>) on all the nodes.</li>
<li>While the default <em>bind.address</em> settings (<code class="docutils literal notranslate"><span class="pre">0.0.0.0</span></code>, used for <code class="docutils literal notranslate"><span class="pre">app.bind.address</span></code>,
<code class="docutils literal notranslate"><span class="pre">data.tx.bind.address</span></code>, <code class="docutils literal notranslate"><span class="pre">router.bind.address</span></code>, and so on) can be synced across hosts,
if you customize them to a particular IP address, they will—as a result—be
different on different hosts.</li>
<li>Starting services is described in <a class="reference internal" href="#mapr-starting-services"><span class="std std-ref">Starting CDAP Services</span></a>.</li>
</ul>
</div>
<div class="section" id="cdap-master">
<h4>CDAP Master<a class="headerlink" href="#cdap-master" title="Permalink to this headline">🔗</a></h4>
<p>The CDAP Master service primarily performs coordination tasks and can be scaled for redundancy. The
instances coordinate amongst themselves, electing one as a leader at all times.</p>
<ul class="simple">
<li>Install the <code class="docutils literal notranslate"><span class="pre">cdap-master</span></code> package on different nodes.</li>
<li>Ensure they are configured identically (<code class="docutils literal notranslate"><span class="pre">/etc/cdap/conf/cdap-site.xml</span></code>).</li>
<li>Start the <code class="docutils literal notranslate"><span class="pre">cdap-master</span></code> service on each node.</li>
</ul>
</div>
<div class="section" id="cdap-router">
<h4>CDAP Router<a class="headerlink" href="#cdap-router" title="Permalink to this headline">🔗</a></h4>
<p>The CDAP Router service is a stateless API endpoint for CDAP, and simply routes requests to the
appropriate service. It can be scaled horizontally for performance. A load balancer, if
desired, can be placed in front of the nodes running the service.</p>
<ul class="simple">
<li>Install the <code class="docutils literal notranslate"><span class="pre">cdap-gateway</span></code> package on different nodes.</li>
<li>The <code class="docutils literal notranslate"><span class="pre">router.bind.address</span></code> may need to be customized on each box if it is not set to
the default wildcard address (<code class="docutils literal notranslate"><span class="pre">0.0.0.0</span></code>).</li>
<li>Start the <code class="docutils literal notranslate"><span class="pre">cdap-router</span></code> service on each node.</li>
</ul>
</div>
<div class="section" id="cdap-kafka">
<h4>CDAP Kafka<a class="headerlink" href="#cdap-kafka" title="Permalink to this headline">🔗</a></h4>
<ul>
<li><p class="first">Install the <code class="docutils literal notranslate"><span class="pre">cdap-kafka</span></code> package on different nodes.</p>
</li>
<li><p class="first">Two properties need to be set in the <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> files on each node:</p>
<ul>
<li><p class="first">The <strong>Kafka seed brokers list</strong> is a comma-separated list of hosts, followed by <code class="docutils literal notranslate"><span class="pre">/${root.namespace}</span></code>:</p>
<blockquote>
<div><p><code class="docutils literal notranslate"><span class="pre">kafka.seed.brokers</span></code>: <code class="docutils literal notranslate"><span class="pre">myhost.example.com:9092,.../${root.namespace}</span></code></p>
</div></blockquote>
<p>Substitute appropriate addresses for <code class="docutils literal notranslate"><span class="pre">myhost.example.com</span></code> in the above example.</p>
</li>
<li><p class="first">The <strong>replication factor</strong> is used to replicate Kafka messages across multiple
machines to prevent data loss in the event of a hardware failure:</p>
<blockquote>
<div><p><code class="docutils literal notranslate"><span class="pre">kafka.server.default.replication.factor</span></code>: 2</p>
</div></blockquote>
</li>
</ul>
</li>
<li><p class="first">The recommended setting is to run at least two Kafka brokers with a minimum replication
factor of two; set this property to the maximum number of tolerated machine failures
plus one (assuming you have that number of machines). For example, if you were running
five Kafka brokers, and would tolerate two of those failing, you would set the
replication factor to three. The number of Kafka brokers listed should always be equal to
or greater than the replication factor.</p>
</li>
<li><p class="first">Start the <code class="docutils literal notranslate"><span class="pre">cdap-kafka</span></code> service on each node.</p>
</li>
</ul>
</div>
<div class="section" id="cdap-ui">
<h4>CDAP UI<a class="headerlink" href="#cdap-ui" title="Permalink to this headline">🔗</a></h4>
<ul class="simple">
<li>Install the <code class="docutils literal notranslate"><span class="pre">cdap-ui</span></code> package on different nodes.</li>
<li>Start the <code class="docutils literal notranslate"><span class="pre">cdap-ui</span></code> service on each node.</li>
</ul>
</div>
<div class="section" id="cdap-authentication-server">
<h4>CDAP Authentication Server<a class="headerlink" href="#cdap-authentication-server" title="Permalink to this headline">🔗</a></h4>
<ul class="simple">
<li>Install the <code class="docutils literal notranslate"><span class="pre">cdap-security</span></code> package (the CDAP Authentication Server) on different nodes.</li>
<li>Start the <code class="docutils literal notranslate"><span class="pre">cdap-security</span></code> service on each node.</li>
<li>Note that when an unauthenticated request is made in a secure HA setup, a list of all
running authentication endpoints will be returned in the body of the request.</li>
</ul>
</div>
</div>
<div class="section" id="hive-execution-engines">
<span id="mapr-configuration-enabling-hive-execution-engines"></span><h3>Hive Execution Engines<a class="headerlink" href="#hive-execution-engines" title="Permalink to this headline">🔗</a></h3>
<p>CDAP Explore has support for additional execution engines such as
<a class="reference external" href="http://spark.apache.org/">Apache Spark</a> and
<a class="reference external" href="http://tez.apache.org/">Apache Tez</a>. Details on specifying these engines and
configuring CDAP are in the Developer Manual section on Date Exploration,
<a class="reference external" href="../../../developer-manual/data-exploration/hive-execution-engines.html#hive-ee" title="(in Cask Data Application Platform v6.1.1)"><span class="xref std std-ref">Hive Execution Engines</span></a>.</p>
</div>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="Installation using Apache Ambari" href="ambari.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="Installation on Microsoft Azure HDInsight" href="azure-hdinsight.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>